In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [5]:
import os
In [6]:
os.chdir('D:\Application of software')
In [7]:
df=pd.read_csv("D:/Application of software/ab40c054-5031-4376-b52e-9813e776f65e.csv")
In [8]:
df
Out[8]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Telangana Tripura Uttar Pradesh Uttarakhand West Bengal1 Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
0 GSDP - CURRENT PRICES (` in Crore) 2011-12 379402.00 11063.00 143175.00 247144.00 158074.00 42367.00 615606.00 297539.00 ... 359433.00 19208.00 724049.00 115523.00 NaN 3979.00 18768.00 343767.00 16818.00 8736039.00
1 GSDP - CURRENT PRICES (` in Crore) 2012-13 411404.00 12547.00 156864.00 282368.00 177511.00 38120.00 724495.00 347032.00 ... 401493.00 21663.00 822903.00 131835.00 NaN 4421.00 21609.00 391238.00 18875.00 9946636.00
2 GSDP - CURRENT PRICES (` in Crore) 2013-14 464272.00 14602.00 177745.00 317101.00 206690.00 35921.00 807623.00 400662.00 ... 452186.00 25593.00 944146.00 149817.00 NaN 5159.00 24787.00 443783.00 21870.00 11236635.00
3 GSDP - CURRENT PRICES (` in Crore) 2014-15 526468.00 16761.00 198098.00 373920.00 234982.00 40633.00 895027.00 437462.00 ... 511178.00 29667.00 1043371.00 161985.00 NaN 5721.00 27844.00 492424.00 24089.00 12433749.00
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.00 18784.00 224234.00 413503.00 260776.00 45002.00 994316.00 485184.00 ... 575631.00 NaN 1153795.00 184091.00 NaN NaN 30304.00 551963.00 26533.00 13675331.00
5 GSDP - CURRENT PRICES (` in Crore) 2016-17 699307.00 NaN NaN NaN 290140.00 NaN NaN 547396.00 ... 654294.00 NaN NaN NaN NaN NaN NaN 622385.00 29557.00 15251028.00
6 (% Growth over previous year) 2012-13 8.43 13.41 9.56 14.25 12.30 -10.02 17.69 16.63 ... 11.70 12.78 13.65 14.12 NaN 11.13 15.14 13.81 12.23 13.86
7 (% Growth over previous year) 2013-14 12.85 16.38 13.31 12.30 16.44 -5.77 11.47 15.45 ... 12.63 18.14 14.73 13.64 NaN 16.68 14.71 13.43 15.87 12.97
8 (% Growth over previous year) 2014-15 13.40 14.79 11.45 17.92 13.69 13.12 10.82 9.18 ... 13.05 15.92 10.51 8.12 NaN 10.89 12.33 10.96 10.14 10.65
9 (% Growth over previous year) 2015-16 15.85 12.07 13.19 10.59 10.98 10.75 11.09 10.91 ... 12.61 NaN 10.58 13.65 NaN NaN 8.84 12.09 10.15 9.99
10 (% Growth over previous year) 2016-17 14.65 NaN NaN NaN 11.26 NaN NaN 12.82 ... 13.67 NaN NaN NaN NaN NaN NaN 12.76 11.40 11.52

11 rows × 36 columns

In [9]:
df.head()
Out[9]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Telangana Tripura Uttar Pradesh Uttarakhand West Bengal1 Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
0 GSDP - CURRENT PRICES (` in Crore) 2011-12 379402.0 11063.0 143175.0 247144.0 158074.0 42367.0 615606.0 297539.0 ... 359433.0 19208.0 724049.0 115523.0 NaN 3979.0 18768.0 343767.0 16818.0 8736039.0
1 GSDP - CURRENT PRICES (` in Crore) 2012-13 411404.0 12547.0 156864.0 282368.0 177511.0 38120.0 724495.0 347032.0 ... 401493.0 21663.0 822903.0 131835.0 NaN 4421.0 21609.0 391238.0 18875.0 9946636.0
2 GSDP - CURRENT PRICES (` in Crore) 2013-14 464272.0 14602.0 177745.0 317101.0 206690.0 35921.0 807623.0 400662.0 ... 452186.0 25593.0 944146.0 149817.0 NaN 5159.0 24787.0 443783.0 21870.0 11236635.0
3 GSDP - CURRENT PRICES (` in Crore) 2014-15 526468.0 16761.0 198098.0 373920.0 234982.0 40633.0 895027.0 437462.0 ... 511178.0 29667.0 1043371.0 161985.0 NaN 5721.0 27844.0 492424.0 24089.0 12433749.0
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.0 18784.0 224234.0 413503.0 260776.0 45002.0 994316.0 485184.0 ... 575631.0 NaN 1153795.0 184091.0 NaN NaN 30304.0 551963.0 26533.0 13675331.0

5 rows × 36 columns

In [10]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 36 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Items  Description         11 non-null     object 
 1   Duration                   11 non-null     object 
 2   Andhra Pradesh             11 non-null     float64
 3   Arunachal Pradesh          9 non-null      float64
 4   Assam                      9 non-null      float64
 5   Bihar                      9 non-null      float64
 6   Chhattisgarh               11 non-null     float64
 7   Goa                        9 non-null      float64
 8   Gujarat                    9 non-null      float64
 9   Haryana                    11 non-null     float64
 10  Himachal Pradesh           7 non-null      float64
 11  Jammu & Kashmir            9 non-null      float64
 12  Jharkhand                  9 non-null      float64
 13  Karnataka                  9 non-null      float64
 14  Kerala                     9 non-null      float64
 15  Madhya Pradesh             11 non-null     float64
 16  Maharashtra                7 non-null      float64
 17  Manipur                    7 non-null      float64
 18  Meghalaya                  11 non-null     float64
 19  Mizoram                    7 non-null      float64
 20  Nagaland                   7 non-null      float64
 21  Odisha                     11 non-null     float64
 22  Punjab                     7 non-null      float64
 23  Rajasthan                  7 non-null      float64
 24  Sikkim                     9 non-null      float64
 25  Tamil Nadu                 11 non-null     float64
 26  Telangana                  11 non-null     float64
 27  Tripura                    7 non-null      float64
 28  Uttar Pradesh              9 non-null      float64
 29  Uttarakhand                9 non-null      float64
 30  West Bengal1               0 non-null      float64
 31  Andaman & Nicobar Islands  7 non-null      float64
 32  Chandigarh                 9 non-null      float64
 33  Delhi                      11 non-null     float64
 34  Puducherry                 11 non-null     float64
 35  All_India GDP              11 non-null     float64
dtypes: float64(34), object(2)
memory usage: 3.2+ KB
In [11]:
df.columns
Out[11]:
Index(['Items  Description', 'Duration', 'Andhra Pradesh ',
       'Arunachal Pradesh', 'Assam', 'Bihar', 'Chhattisgarh', 'Goa', 'Gujarat',
       'Haryana', 'Himachal Pradesh', 'Jammu & Kashmir', 'Jharkhand',
       'Karnataka', 'Kerala', 'Madhya Pradesh', 'Maharashtra', 'Manipur',
       'Meghalaya', 'Mizoram', 'Nagaland', 'Odisha', 'Punjab', 'Rajasthan',
       'Sikkim', 'Tamil Nadu', 'Telangana', 'Tripura', 'Uttar Pradesh',
       'Uttarakhand', 'West Bengal1', 'Andaman & Nicobar Islands',
       'Chandigarh', 'Delhi', 'Puducherry', 'All_India GDP'],
      dtype='object')
In [12]:
df1 = df[df['Duration'] != '2016-17']
df1
Out[12]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Telangana Tripura Uttar Pradesh Uttarakhand West Bengal1 Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
0 GSDP - CURRENT PRICES (` in Crore) 2011-12 379402.00 11063.00 143175.00 247144.00 158074.00 42367.00 615606.00 297539.00 ... 359433.00 19208.00 724049.00 115523.00 NaN 3979.00 18768.00 343767.00 16818.00 8736039.00
1 GSDP - CURRENT PRICES (` in Crore) 2012-13 411404.00 12547.00 156864.00 282368.00 177511.00 38120.00 724495.00 347032.00 ... 401493.00 21663.00 822903.00 131835.00 NaN 4421.00 21609.00 391238.00 18875.00 9946636.00
2 GSDP - CURRENT PRICES (` in Crore) 2013-14 464272.00 14602.00 177745.00 317101.00 206690.00 35921.00 807623.00 400662.00 ... 452186.00 25593.00 944146.00 149817.00 NaN 5159.00 24787.00 443783.00 21870.00 11236635.00
3 GSDP - CURRENT PRICES (` in Crore) 2014-15 526468.00 16761.00 198098.00 373920.00 234982.00 40633.00 895027.00 437462.00 ... 511178.00 29667.00 1043371.00 161985.00 NaN 5721.00 27844.00 492424.00 24089.00 12433749.00
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.00 18784.00 224234.00 413503.00 260776.00 45002.00 994316.00 485184.00 ... 575631.00 NaN 1153795.00 184091.00 NaN NaN 30304.00 551963.00 26533.00 13675331.00
6 (% Growth over previous year) 2012-13 8.43 13.41 9.56 14.25 12.30 -10.02 17.69 16.63 ... 11.70 12.78 13.65 14.12 NaN 11.13 15.14 13.81 12.23 13.86
7 (% Growth over previous year) 2013-14 12.85 16.38 13.31 12.30 16.44 -5.77 11.47 15.45 ... 12.63 18.14 14.73 13.64 NaN 16.68 14.71 13.43 15.87 12.97
8 (% Growth over previous year) 2014-15 13.40 14.79 11.45 17.92 13.69 13.12 10.82 9.18 ... 13.05 15.92 10.51 8.12 NaN 10.89 12.33 10.96 10.14 10.65
9 (% Growth over previous year) 2015-16 15.85 12.07 13.19 10.59 10.98 10.75 11.09 10.91 ... 12.61 NaN 10.58 13.65 NaN NaN 8.84 12.09 10.15 9.99

9 rows × 36 columns

In [13]:
df1.isnull().sum()
Out[13]:
Items  Description           0
Duration                     0
Andhra Pradesh               0
Arunachal Pradesh            0
Assam                        0
Bihar                        0
Chhattisgarh                 0
Goa                          0
Gujarat                      0
Haryana                      0
Himachal Pradesh             2
Jammu & Kashmir              0
Jharkhand                    0
Karnataka                    0
Kerala                       0
Madhya Pradesh               0
Maharashtra                  2
Manipur                      2
Meghalaya                    0
Mizoram                      2
Nagaland                     2
Odisha                       0
Punjab                       2
Rajasthan                    2
Sikkim                       0
Tamil Nadu                   0
Telangana                    0
Tripura                      2
Uttar Pradesh                0
Uttarakhand                  0
West Bengal1                 9
Andaman & Nicobar Islands    2
Chandigarh                   0
Delhi                        0
Puducherry                   0
All_India GDP                0
dtype: int64
In [14]:
df1.isnull().all(axis=0)
Out[14]:
Items  Description           False
Duration                     False
Andhra Pradesh               False
Arunachal Pradesh            False
Assam                        False
Bihar                        False
Chhattisgarh                 False
Goa                          False
Gujarat                      False
Haryana                      False
Himachal Pradesh             False
Jammu & Kashmir              False
Jharkhand                    False
Karnataka                    False
Kerala                       False
Madhya Pradesh               False
Maharashtra                  False
Manipur                      False
Meghalaya                    False
Mizoram                      False
Nagaland                     False
Odisha                       False
Punjab                       False
Rajasthan                    False
Sikkim                       False
Tamil Nadu                   False
Telangana                    False
Tripura                      False
Uttar Pradesh                False
Uttarakhand                  False
West Bengal1                  True
Andaman & Nicobar Islands    False
Chandigarh                   False
Delhi                        False
Puducherry                   False
All_India GDP                False
dtype: bool
In [15]:
# removing West Bengal as the whole column is NAN
df1 = df1.drop('West Bengal1', axis = 1)
In [16]:
df1
Out[16]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Tamil Nadu Telangana Tripura Uttar Pradesh Uttarakhand Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
0 GSDP - CURRENT PRICES (` in Crore) 2011-12 379402.00 11063.00 143175.00 247144.00 158074.00 42367.00 615606.00 297539.00 ... 751485.00 359433.00 19208.00 724049.00 115523.00 3979.00 18768.00 343767.00 16818.00 8736039.00
1 GSDP - CURRENT PRICES (` in Crore) 2012-13 411404.00 12547.00 156864.00 282368.00 177511.00 38120.00 724495.00 347032.00 ... 855481.00 401493.00 21663.00 822903.00 131835.00 4421.00 21609.00 391238.00 18875.00 9946636.00
2 GSDP - CURRENT PRICES (` in Crore) 2013-14 464272.00 14602.00 177745.00 317101.00 206690.00 35921.00 807623.00 400662.00 ... 971090.00 452186.00 25593.00 944146.00 149817.00 5159.00 24787.00 443783.00 21870.00 11236635.00
3 GSDP - CURRENT PRICES (` in Crore) 2014-15 526468.00 16761.00 198098.00 373920.00 234982.00 40633.00 895027.00 437462.00 ... 1092564.00 511178.00 29667.00 1043371.00 161985.00 5721.00 27844.00 492424.00 24089.00 12433749.00
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.00 18784.00 224234.00 413503.00 260776.00 45002.00 994316.00 485184.00 ... 1212668.00 575631.00 NaN 1153795.00 184091.00 NaN 30304.00 551963.00 26533.00 13675331.00
6 (% Growth over previous year) 2012-13 8.43 13.41 9.56 14.25 12.30 -10.02 17.69 16.63 ... 13.84 11.70 12.78 13.65 14.12 11.13 15.14 13.81 12.23 13.86
7 (% Growth over previous year) 2013-14 12.85 16.38 13.31 12.30 16.44 -5.77 11.47 15.45 ... 13.51 12.63 18.14 14.73 13.64 16.68 14.71 13.43 15.87 12.97
8 (% Growth over previous year) 2014-15 13.40 14.79 11.45 17.92 13.69 13.12 10.82 9.18 ... 12.51 13.05 15.92 10.51 8.12 10.89 12.33 10.96 10.14 10.65
9 (% Growth over previous year) 2015-16 15.85 12.07 13.19 10.59 10.98 10.75 11.09 10.91 ... 10.99 12.61 NaN 10.58 13.65 NaN 8.84 12.09 10.15 9.99

9 rows × 35 columns

In [17]:
df1.iloc[6:].isnull().sum() 
# since there are at max. only 1 missing value we can take the average of the other two numbers
Out[17]:
Items  Description           0
Duration                     0
Andhra Pradesh               0
Arunachal Pradesh            0
Assam                        0
Bihar                        0
Chhattisgarh                 0
Goa                          0
Gujarat                      0
Haryana                      0
Himachal Pradesh             1
Jammu & Kashmir              0
Jharkhand                    0
Karnataka                    0
Kerala                       0
Madhya Pradesh               0
Maharashtra                  1
Manipur                      1
Meghalaya                    0
Mizoram                      1
Nagaland                     1
Odisha                       0
Punjab                       1
Rajasthan                    1
Sikkim                       0
Tamil Nadu                   0
Telangana                    0
Tripura                      1
Uttar Pradesh                0
Uttarakhand                  0
Andaman & Nicobar Islands    1
Chandigarh                   0
Delhi                        0
Puducherry                   0
All_India GDP                0
dtype: int64
In [18]:
avg_growth = df1.iloc[6:]
In [19]:
avg_growth #dataframe to find the average growth of states
Out[19]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Tamil Nadu Telangana Tripura Uttar Pradesh Uttarakhand Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
7 (% Growth over previous year) 2013-14 12.85 16.38 13.31 12.30 16.44 -5.77 11.47 15.45 ... 13.51 12.63 18.14 14.73 13.64 16.68 14.71 13.43 15.87 12.97
8 (% Growth over previous year) 2014-15 13.40 14.79 11.45 17.92 13.69 13.12 10.82 9.18 ... 12.51 13.05 15.92 10.51 8.12 10.89 12.33 10.96 10.14 10.65
9 (% Growth over previous year) 2015-16 15.85 12.07 13.19 10.59 10.98 10.75 11.09 10.91 ... 10.99 12.61 NaN 10.58 13.65 NaN 8.84 12.09 10.15 9.99

3 rows × 35 columns

In [20]:
avg_growth.columns
Out[20]:
Index(['Items  Description', 'Duration', 'Andhra Pradesh ',
       'Arunachal Pradesh', 'Assam', 'Bihar', 'Chhattisgarh', 'Goa', 'Gujarat',
       'Haryana', 'Himachal Pradesh', 'Jammu & Kashmir', 'Jharkhand',
       'Karnataka', 'Kerala', 'Madhya Pradesh', 'Maharashtra', 'Manipur',
       'Meghalaya', 'Mizoram', 'Nagaland', 'Odisha', 'Punjab', 'Rajasthan',
       'Sikkim', 'Tamil Nadu', 'Telangana', 'Tripura', 'Uttar Pradesh',
       'Uttarakhand', 'Andaman & Nicobar Islands', 'Chandigarh', 'Delhi',
       'Puducherry', 'All_India GDP'],
      dtype='object')
In [21]:
avg_growth.count
Out[21]:
<bound method DataFrame.count of               Items  Description Duration  Andhra Pradesh   Arunachal Pradesh  \
7  (% Growth over previous year)  2013-14            12.85              16.38   
8  (% Growth over previous year)  2014-15            13.40              14.79   
9  (% Growth over previous year)  2015-16            15.85              12.07   

   Assam  Bihar  Chhattisgarh    Goa  Gujarat  Haryana  ...  Tamil Nadu  \
7  13.31  12.30         16.44  -5.77    11.47    15.45  ...       13.51   
8  11.45  17.92         13.69  13.12    10.82     9.18  ...       12.51   
9  13.19  10.59         10.98  10.75    11.09    10.91  ...       10.99   

   Telangana  Tripura  Uttar Pradesh  Uttarakhand  Andaman & Nicobar Islands  \
7      12.63    18.14          14.73        13.64                      16.68   
8      13.05    15.92          10.51         8.12                      10.89   
9      12.61      NaN          10.58        13.65                        NaN   

   Chandigarh  Delhi  Puducherry  All_India GDP  
7       14.71  13.43       15.87          12.97  
8       12.33  10.96       10.14          10.65  
9        8.84  12.09       10.15           9.99  

[3 rows x 35 columns]>
In [22]:
# Taking only the values for the states
average_growth_values = avg_growth[avg_growth.columns[2:34]].mean()
In [23]:
# Sorting the average growth rate values and then making a dataframe for all the states
average_growth_values = average_growth_values.sort_values()
average_growth_rate = average_growth_values.to_frame(name='Average growth rate')
average_growth_rate
Out[23]:
Average growth rate
Goa 6.033333
Meghalaya 6.953333
Odisha 9.836667
Sikkim 10.486667
Jammu & Kashmir 10.900000
Gujarat 11.126667
Punjab 11.185000
Maharashtra 11.260000
Rajasthan 11.320000
Jharkhand 11.500000
Uttarakhand 11.803333
Haryana 11.846667
Uttar Pradesh 11.940000
Chandigarh 11.960000
Puducherry 12.053333
Delhi 12.160000
Himachal Pradesh 12.280000
Tamil Nadu 12.336667
Kerala 12.583333
Madhya Pradesh 12.626667
Assam 12.650000
Telangana 12.763333
Bihar 13.603333
Chhattisgarh 13.703333
Andaman & Nicobar Islands 13.785000
Andhra Pradesh 14.033333
Karnataka 14.120000
Arunachal Pradesh 14.413333
Manipur 14.610000
Nagaland 16.415000
Tripura 17.030000
Mizoram 17.700000
In [24]:
# plotting the average growth rate for all the states
plt.figure(figsize=(12,10), dpi = 300)

sns.barplot(x = average_growth_rate['Average growth rate'], y = average_growth_values.index,palette='viridis')
plt.xlabel('Average Growth Rate', fontsize=10)
plt.ylabel('States', fontsize=12)
plt.title('Average Growth Rate for all the states',fontsize=13)
plt.show()
In [25]:
# top 5 states as per average growth rate

average_growth_rate['Average growth rate'][-5:]
Out[25]:
Arunachal Pradesh    14.413333
Manipur              14.610000
Nagaland             16.415000
Tripura              17.030000
Mizoram              17.700000
Name: Average growth rate, dtype: float64
In [26]:
# top 5 states as per average growth rate for the years 2013-14, 2014-15, 2015-16

avg_growth[['Mizoram','Tripura','Nagaland','Manipur','Arunachal Pradesh']]
Out[26]:
Mizoram Tripura Nagaland Manipur Arunachal Pradesh
7 23.1 18.14 21.98 17.83 16.38
8 12.3 15.92 10.85 11.39 14.79
9 NaN NaN NaN NaN 12.07
In [27]:
#create a dataframe to store the mean and the standard deviation of the growth rate for various states

describe = pd.DataFrame(avg_growth.describe())
describe = describe.T
describe
Out[27]:
count mean std min 25% 50% 75% max
Andhra Pradesh 3.0 14.033333 1.597133 12.85 13.1250 13.400 14.6250 15.85
Arunachal Pradesh 3.0 14.413333 2.179549 12.07 13.4300 14.790 15.5850 16.38
Assam 3.0 12.650000 1.040961 11.45 12.3200 13.190 13.2500 13.31
Bihar 3.0 13.603333 3.834871 10.59 11.4450 12.300 15.1100 17.92
Chhattisgarh 3.0 13.703333 2.730024 10.98 12.3350 13.690 15.0650 16.44
Goa 3.0 6.033333 10.290444 -5.77 2.4900 10.750 11.9350 13.12
Gujarat 3.0 11.126667 0.326548 10.82 10.9550 11.090 11.2800 11.47
Haryana 3.0 11.846667 3.238245 9.18 10.0450 10.910 13.1800 15.45
Himachal Pradesh 2.0 12.280000 3.026417 10.14 11.2100 12.280 13.3500 14.42
Jammu & Kashmir 3.0 10.900000 6.642146 4.70 7.3950 10.090 14.0000 17.91
Jharkhand 3.0 11.500000 3.610374 7.92 9.6800 11.440 13.2900 15.14
Karnataka 3.0 14.120000 3.624969 11.42 12.0600 12.700 15.4700 18.24
Kerala 3.0 12.583333 0.654930 11.85 12.3200 12.790 12.9500 13.11
Madhya Pradesh 3.0 12.626667 2.408492 10.11 11.4850 12.860 13.8850 14.91
Maharashtra 2.0 11.260000 3.507250 8.78 10.0200 11.260 12.5000 13.74
Manipur 2.0 14.610000 4.553768 11.39 13.0000 14.610 16.2200 17.83
Meghalaya 3.0 6.953333 2.401548 4.87 5.6400 6.410 7.9950 9.58
Mizoram 2.0 17.700000 7.636753 12.30 15.0000 17.700 20.4000 23.10
Nagaland 2.0 16.415000 7.870098 10.85 13.6325 16.415 19.1975 21.98
Odisha 3.0 9.836667 3.411412 6.19 8.2800 10.370 11.6600 12.95
Punjab 2.0 11.185000 1.746554 9.95 10.5675 11.185 11.8025 12.42
Rajasthan 2.0 11.320000 0.070711 11.27 11.2950 11.320 11.3450 11.37
Sikkim 3.0 10.486667 1.622108 9.39 9.5550 9.720 11.0350 12.35
Tamil Nadu 3.0 12.336667 1.268910 10.99 11.7500 12.510 13.0100 13.51
Telangana 3.0 12.763333 0.248462 12.61 12.6200 12.630 12.8400 13.05
Tripura 2.0 17.030000 1.569777 15.92 16.4750 17.030 17.5850 18.14
Uttar Pradesh 3.0 11.940000 2.416464 10.51 10.5450 10.580 12.6550 14.73
Uttarakhand 3.0 11.803333 3.189864 8.12 10.8800 13.640 13.6450 13.65
Andaman & Nicobar Islands 2.0 13.785000 4.094148 10.89 12.3375 13.785 15.2325 16.68
Chandigarh 3.0 11.960000 2.952440 8.84 10.5850 12.330 13.5200 14.71
Delhi 3.0 12.160000 1.236487 10.96 11.5250 12.090 12.7600 13.43
Puducherry 3.0 12.053333 3.305334 10.14 10.1450 10.150 13.0100 15.87
All_India GDP 3.0 11.203333 1.565162 9.99 10.3200 10.650 11.8100 12.97
In [28]:
# states having mean growth rate greater than 12 and standard deviation less than 2

describe[(describe['mean']>12) & (describe['std']<2)]
Out[28]:
count mean std min 25% 50% 75% max
Andhra Pradesh 3.0 14.033333 1.597133 12.85 13.125 13.40 14.625 15.85
Assam 3.0 12.650000 1.040961 11.45 12.320 13.19 13.250 13.31
Kerala 3.0 12.583333 0.654930 11.85 12.320 12.79 12.950 13.11
Tamil Nadu 3.0 12.336667 1.268910 10.99 11.750 12.51 13.010 13.51
Telangana 3.0 12.763333 0.248462 12.61 12.620 12.63 12.840 13.05
Tripura 2.0 17.030000 1.569777 15.92 16.475 17.03 17.585 18.14
Delhi 3.0 12.160000 1.236487 10.96 11.525 12.09 12.760 13.43
In [29]:
# states having mean growth rate greater than 13 and standard deviation greater than 2

describe[(describe['mean']<12) & (describe['std']>2)]
Out[29]:
count mean std min 25% 50% 75% max
Goa 3.0 6.033333 10.290444 -5.77 2.490 10.75 11.935 13.12
Haryana 3.0 11.846667 3.238245 9.18 10.045 10.91 13.180 15.45
Jammu & Kashmir 3.0 10.900000 6.642146 4.70 7.395 10.09 14.000 17.91
Jharkhand 3.0 11.500000 3.610374 7.92 9.680 11.44 13.290 15.14
Maharashtra 2.0 11.260000 3.507250 8.78 10.020 11.26 12.500 13.74
Meghalaya 3.0 6.953333 2.401548 4.87 5.640 6.41 7.995 9.58
Odisha 3.0 9.836667 3.411412 6.19 8.280 10.37 11.660 12.95
Uttar Pradesh 3.0 11.940000 2.416464 10.51 10.545 10.58 12.655 14.73
Uttarakhand 3.0 11.803333 3.189864 8.12 10.880 13.64 13.645 13.65
Chandigarh 3.0 11.960000 2.952440 8.84 10.585 12.33 13.520 14.71
In [30]:
df1.head()
Out[30]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Tamil Nadu Telangana Tripura Uttar Pradesh Uttarakhand Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
0 GSDP - CURRENT PRICES (` in Crore) 2011-12 379402.0 11063.0 143175.0 247144.0 158074.0 42367.0 615606.0 297539.0 ... 751485.0 359433.0 19208.0 724049.0 115523.0 3979.0 18768.0 343767.0 16818.0 8736039.0
1 GSDP - CURRENT PRICES (` in Crore) 2012-13 411404.0 12547.0 156864.0 282368.0 177511.0 38120.0 724495.0 347032.0 ... 855481.0 401493.0 21663.0 822903.0 131835.0 4421.0 21609.0 391238.0 18875.0 9946636.0
2 GSDP - CURRENT PRICES (` in Crore) 2013-14 464272.0 14602.0 177745.0 317101.0 206690.0 35921.0 807623.0 400662.0 ... 971090.0 452186.0 25593.0 944146.0 149817.0 5159.0 24787.0 443783.0 21870.0 11236635.0
3 GSDP - CURRENT PRICES (` in Crore) 2014-15 526468.0 16761.0 198098.0 373920.0 234982.0 40633.0 895027.0 437462.0 ... 1092564.0 511178.0 29667.0 1043371.0 161985.0 5721.0 27844.0 492424.0 24089.0 12433749.0
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.0 18784.0 224234.0 413503.0 260776.0 45002.0 994316.0 485184.0 ... 1212668.0 575631.0 NaN 1153795.0 184091.0 NaN 30304.0 551963.0 26533.0 13675331.0

5 rows × 35 columns

In [31]:
# filtering out the data for the year 2015-16 and storing it in a dataframe
total_GDP_15_16 = df1[(df1['Items  Description'] == 'GSDP - CURRENT PRICES (` in Crore)') & (df1['Duration'] == '2015-16')]
total_GDP_15_16
Out[31]:
Items Description Duration Andhra Pradesh Arunachal Pradesh Assam Bihar Chhattisgarh Goa Gujarat Haryana ... Tamil Nadu Telangana Tripura Uttar Pradesh Uttarakhand Andaman & Nicobar Islands Chandigarh Delhi Puducherry All_India GDP
4 GSDP - CURRENT PRICES (` in Crore) 2015-16 609934.0 18784.0 224234.0 413503.0 260776.0 45002.0 994316.0 485184.0 ... 1212668.0 575631.0 NaN 1153795.0 184091.0 NaN 30304.0 551963.0 26533.0 13675331.0

1 rows × 35 columns

In [32]:
# carrying out necessary transformation to make the data ready for plotting

total_GDP_15_16_states = total_GDP_15_16[total_GDP_15_16.columns[2:34]].transpose()
total_GDP_15_16_states = total_GDP_15_16_states.rename(columns={4: 'Total GDP of States 2015-16'})
total_GDP_15_16_states = total_GDP_15_16_states.dropna()
total_GDP_15_16_states = total_GDP_15_16_states.sort_values('Total GDP of States 2015-16',ascending=True)
total_GDP_15_16_states
Out[32]:
Total GDP of States 2015-16
Sikkim 16637.0
Arunachal Pradesh 18784.0
Puducherry 26533.0
Meghalaya 26745.0
Chandigarh 30304.0
Goa 45002.0
Jammu & Kashmir 118387.0
Uttarakhand 184091.0
Assam 224234.0
Jharkhand 241955.0
Chhattisgarh 260776.0
Odisha 341887.0
Bihar 413503.0
Haryana 485184.0
Madhya Pradesh 543975.0
Delhi 551963.0
Telangana 575631.0
Kerala 588337.0
Andhra Pradesh 609934.0
Gujarat 994316.0
Karnataka 1027068.0
Uttar Pradesh 1153795.0
Tamil Nadu 1212668.0
In [33]:
plt.figure(figsize=(10,8), dpi = 600)

sns.barplot(x = total_GDP_15_16_states['Total GDP of States 2015-16'], y = total_GDP_15_16_states.index,palette='plasma')
plt.xlabel('Total GDP of States for 2015-16', fontsize=15)
plt.ylabel('States', fontsize=12)
plt.title('Total GDP of States 2015-16 for all the states',fontsize=12)
plt.show()
In [34]:
top_5_eco = total_GDP_15_16_states[-5:]
top_5_eco
Out[34]:
Total GDP of States 2015-16
Andhra Pradesh 609934.0
Gujarat 994316.0
Karnataka 1027068.0
Uttar Pradesh 1153795.0
Tamil Nadu 1212668.0
In [35]:
bottom_5_eco = total_GDP_15_16_states[:5]
bottom_5_eco
Out[35]:
Total GDP of States 2015-16
Sikkim 16637.0
Arunachal Pradesh 18784.0
Puducherry 26533.0
Meghalaya 26745.0
Chandigarh 30304.0
In [36]:
df1=pd.read_excel("D:\Application of software\GDP.xlsx")
In [37]:
df1
Out[37]:
States Year_2020_21
0 Andhra Pradesh 105880
1 Arunachal Pradesh 106903
2 Assam 61304
3 Bihar 28127
4 Chhattisgarh 73259
5 Goa 298527
6 Gujarat 160321
7 Haryana 155756
8 Himachal Pradesh 133079
9 Jharkhand 51365
10 Karnataka 149030
11 Kerala 132700
12 Madhya Pradesh 56320
13 Maharashtra 127970
14 Manipur 46968
15 Meghalaya 56679
16 Mizoram 118555
17 Nagaland 72215
18 Odisha 70877
19 Punjab 111662
20 Rajasthan 73140
21 Sikkim 239928
22 Tamil Nadu 143528
23 Telangana 139640
24 Tripura 80512
25 Uttar Pradesh 39298
26 Uttarakhand 137987
27 West Bengal 63562
28 Andaman & Nicobar Islands 152859
29 Chandigarh 199845
30 Delhi 234569
31 Jammu & Kashmir 65172
32 Puducherry 144866
In [38]:
os.chdir('E:\states')
In [39]:
ap=pd.read_csv("E:/states/NAD-Andhra_Pradesh-GSVA_cur_2016-17.csv")
In [40]:
ap
Out[40]:
S.No. Item 2011-12 2012-13 2013-14 2014-15 2015-16 2016-17
0 1 Agriculture, forestry and fishing 9400805 11186428 12895568 14819416 17326726 20386004
1 1.1 Crops 5204052 6123041 7114707 7893514 8644285 9717089
2 1.2 Livestock 2758776 3358438 3643026 4309078 5155487 5979648
3 1.3 Forestry and logging 250314 253029 280493 346160 340550 335487
4 1.4 Fishing and aquaculture 1187663 1451920 1857342 2270664 3186404 4353780
5 2 Mining and quarrying 1416194 1459027 1419200 1484300 1633100 1757565
6 Total Primary 10816999 12645455 14314768 16303716 18959826 22143569
7 3 Manufacturing 5070622 4237657 4242337 4672266 5078685 5740816
8 4 Electricity, gas, water supply & other utility... 1076517 713029 1014130 1151729 1251910 1298947
9 5 Construction 3702645 3759004 4065131 4664889 4986189 5467732
10 Total Secondary 9849784 8709690 9321598 10488884 11316784 12507496
11 6 Trade, repair, hotels and restaurants 2673600 3179200 3784900 4233400 4577700 5174600
12 6.1 Trade & repair services 2322700 2775600 3290800 3716000 3964000 4382200
13 6.2 Hotels & restaurants 350900 403600 494100 517400 613700 792400
14 7 Transport, storage, communication & services r... 3251379 3778632 4365811 5076984 5806226 6766317
15 7.1 Railways 264279 303702 334468 424228 436596 445372
16 7.2 Road transport 1824800 2200400 2516200 2816000 3217400 3798300
17 7.3 Water transport 95100 90400 60600 94200 110500 123600
18 7.4 Air transport 4500 10600 5100 14900 25000 28000
19 7.5 Services incidental to transport 501800 535500 634700 780200 904200 1020800
20 7.6 Storage 20700 16600 18700 18700 19000 20600
21 7.7 Communication & services related to broadcasting 540200 621430 796043 928756 1093530 1329645
22 8 Financial services 1425608 1584297 1710463 1900863 2125508 2415390
23 9 Real estate, ownership of dwelling & professio... 2833805 3434098 3897345 4405409 5092310 5942431
24 10 Public administration 1393752 1552379 1738971 2200897 2594904 3039676
25 11 Other services 2730376 3063920 3538298 4215389 5248604 6180240
26 Total Tertiary 14308520 16592526 19035788 22032942 25445252 29518654
27 12 TOTAL GSVA at basic prices 34975303 37947671 42672154 48825542 55721862 64169719
28 13 Taxes on Products 4243900 4656500 5263500 5512100 6719300 7236500
29 14 Subsidies on products 1279000 1463800 1508500 1690800 1447800 1475500
30 15 Gross State Domestic Product 37940203 41140371 46427154 52646842 60993362 69930719
31 16 Population ('00) 492750 495660 498570 501510 504460 507430
32 17 Per Capita GSDP (Rs.) 76997 83001 93121 104977 120908 137814
In [41]:
Andhra_Pradesh=pd.read_csv("E:/states/NAD-Arunachal_Pradesh-GSVA_cur_2015-16.csv")
In [42]:
Arunachal_Pradesh=pd.read_csv("E:/states/NAD-Assam-GSVA_cur_2015-16.csv")
In [43]:
Assam=pd.read_csv("E:/states/NAD-Assam-GSVA_cur_2015-16.csv")
In [44]:
Bihar=pd.read_csv("E:/states/NAD-Bihar-GSVA_cur_2015-16.csv")
In [45]:
Chhattisgarh=pd.read_csv("E:/states/NAD-Chhattisgarh-GSVA_cur_2016-17.csv")
In [46]:
Goa=pd.read_csv("E:/states/NAD-Goa-GSVA_cur_2015-16.csv")
In [47]:
Gujarat=pd.read_csv("E:/states/NAD-Gujarat-GSVA_cur_2015-16.csv")
In [48]:
Haryana=pd.read_csv("E:/states/NAD-Haryana-GSVA_cur_2016-17.csv")
In [49]:
Himachal_Pradesh=pd.read_csv("E:/states/NAD-Himachal_Pradesh-GSVA_cur_2014-15.csv")
In [50]:
Jharkhand=pd.read_csv("E:/states/NAD-Jharkhand-GSVA_cur_2015-16.csv")
In [51]:
Karnataka=pd.read_csv("E:/states/NAD-Karnataka-GSVA_cur_2015-16.csv")
In [52]:
Kerala=pd.read_csv("E:/states/NAD-Kerala-GSVA_cur_2015-16.csv")
In [53]:
Madhya_Pradesh=pd.read_csv("E:/states/NAD-Madhya_Pradesh-GSVA_cur_2016-17.csv")
In [54]:
Maharashtra=pd.read_csv("E:/states/NAD-Maharashtra-GSVA_cur_2014-15.csv")
In [55]:
Meghalaya=pd.read_csv("E:/states/NAD-Meghalaya-GSVA_cur_2016-17.csv")
In [57]:
Mizoram=pd.read_csv("E:/states/NAD-Mizoram-GSVA_cur_2014-15.csv")
In [58]:
Nagaland=pd.read_csv("E:/states/NAD-Nagaland-GSVA_cur_2014-15.csv")
In [59]:
Odisha=pd.read_csv("E:/states/NAD-Odisha-GSVA_cur_2016-17.csv")
In [60]:
Punjab=pd.read_csv("E:/states/NAD-Punjab-GSVA_cur_2014-15.csv")
In [61]:
Rajasthan=pd.read_csv("E:/states/NAD-Rajasthan-GSVA_cur_2014-15.csv")
In [62]:
Sikkim=pd.read_csv("E:/states/NAD-Sikkim-GSVA_cur_2015-16.csv")
In [63]:
Tamil_Nadu=pd.read_csv("E:/states/NAD-Tamil_Nadu-GSVA_cur_2016-17.csv")
In [64]:
Telangana=pd.read_csv("E:/states/NAD-Telangana-GSVA_cur_2016-17.csv")
In [65]:
Uttar_Pradesh=pd.read_csv("E:/states/NAD-Uttar_Pradesh-GSVA_cur_2015-16.csv")
In [66]:
Uttarakhand=pd.read_csv("E:/states/NAD-Uttarakhand-GSVA_cur_2015-16.csv")
In [67]:
Manipur=pd.read_csv("E:/states/NAD-Manipur-GSVA_cur_2014-15.csv")
---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
Cell In[67], line 1
----> 1 Manipur=pd.read_csv("E:/states/NAD-Manipur-GSVA_cur_2014-15.csv")

File ~\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:912, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
    899 kwds_defaults = _refine_defaults_read(
    900     dialect,
    901     delimiter,
   (...)
    908     dtype_backend=dtype_backend,
    909 )
    910 kwds.update(kwds_defaults)
--> 912 return _read(filepath_or_buffer, kwds)

File ~\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:577, in _read(filepath_or_buffer, kwds)
    574 _validate_names(kwds.get("names", None))
    576 # Create the parser.
--> 577 parser = TextFileReader(filepath_or_buffer, **kwds)
    579 if chunksize or iterator:
    580     return parser

File ~\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:1407, in TextFileReader.__init__(self, f, engine, **kwds)
   1404     self.options["has_index_names"] = kwds["has_index_names"]
   1406 self.handles: IOHandles | None = None
-> 1407 self._engine = self._make_engine(f, self.engine)

File ~\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:1679, in TextFileReader._make_engine(self, f, engine)
   1676     raise ValueError(msg)
   1678 try:
-> 1679     return mapping[engine](f, **self.options)
   1680 except Exception:
   1681     if self.handles is not None:

File ~\anaconda3\Lib\site-packages\pandas\io\parsers\c_parser_wrapper.py:93, in CParserWrapper.__init__(self, src, **kwds)
     90 if kwds["dtype_backend"] == "pyarrow":
     91     # Fail here loudly instead of in cython after reading
     92     import_optional_dependency("pyarrow")
---> 93 self._reader = parsers.TextReader(src, **kwds)
     95 self.unnamed_cols = self._reader.unnamed_cols
     97 # error: Cannot determine type of 'names'

File ~\anaconda3\Lib\site-packages\pandas\_libs\parsers.pyx:550, in pandas._libs.parsers.TextReader.__cinit__()

File ~\anaconda3\Lib\site-packages\pandas\_libs\parsers.pyx:639, in pandas._libs.parsers.TextReader._get_header()

File ~\anaconda3\Lib\site-packages\pandas\_libs\parsers.pyx:850, in pandas._libs.parsers.TextReader._tokenize_rows()

File ~\anaconda3\Lib\site-packages\pandas\_libs\parsers.pyx:861, in pandas._libs.parsers.TextReader._check_tokenize_status()

File ~\anaconda3\Lib\site-packages\pandas\_libs\parsers.pyx:2021, in pandas._libs.parsers.raise_parser_error()

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 337: invalid start byte
In [68]:
Tripura=pd.read_csv("E:/states/NAD-Tripura-GSVA_cur_2014-15.csv")
In [69]:
andhra_pradesh = Andhra_Pradesh[['S.No.','Item', '2014-15']]
andhra_pradesh = andhra_pradesh.rename(columns={'2014-15': 'Andhra_Pradesh'})

arunachal_pradesh = Arunachal_Pradesh[['S.No.','Item', '2014-15']]
arunachal_pradesh = arunachal_pradesh.rename(columns={'2014-15': 'Arunachal_Pradesh'})

assam = Assam[['S.No.','Item', '2014-15']]
assam = assam.rename(columns={'2014-15': 'Assam'})

bihar = Bihar[['S.No.','Item', '2014-15']]
bihar = bihar.rename(columns={'2014-15': 'Bihar'})

chhattisgarh = Chhattisgarh[['S.No.','Item', '2014-15']]
chhattisgarh = chhattisgarh.rename(columns={'2014-15': 'Chhattisgarh'})

goa = Goa[['S.No.','Item', '2014-15']]
goa = goa.rename(columns={'2014-15': 'Goa'})

gujarat = Gujarat[['S.No.','Item', '2014-15']]
gujarat = gujarat.rename(columns={'2014-15': 'Gujarat'})

haryana = Haryana[['S.No.','Item', '2014-15']]
haryana = haryana.rename(columns={'2014-15': 'Haryana'})

himachal_Pradesh = Himachal_Pradesh[['S.No.','Item', '2014-15']]
himachal_Pradesh = himachal_Pradesh.rename(columns={'2014-15': 'Himachal_Pradesh'})

jharkhand = Jharkhand[['S.No.','Item', '2014-15']]
jharkhand = jharkhand.rename(columns={'2014-15': 'Jharkhand'})

karnataka = Karnataka[['S.No.','Item', '2014-15']]
karnataka = karnataka.rename(columns={'2014-15': 'Karnataka'})

kerala = Kerala[['S.No.','Item', '2014-15']]
kerala = kerala.rename(columns={'2014-15': 'Kerala'})

madhya_pradesh = Madhya_Pradesh[['S.No.','Item', '2014-15']]
madhya_pradesh = madhya_pradesh.rename(columns={'2014-15': 'Madhya_Pradesh'})

maharashtra = Maharashtra[['S.No.','Item', '2014-15']]
maharashtra = maharashtra.rename(columns={'2014-15': 'Maharashtra'})



meghalaya = Meghalaya[['S.No.','Item', '2014-15']]
meghalaya = meghalaya.rename(columns={'2014-15': 'Meghalaya'})

mizoram = Mizoram[['S.No.','Item', '2014-15']]
mizoram = mizoram.rename(columns={'2014-15': 'Mizoram'})

nagaland = Nagaland[['S.No.','Item', '2014-15']]
nagaland = nagaland.rename(columns={'2014-15': 'Nagaland'})

odisha = Odisha[['S.No.','Item', '2014-15']]
odisha = odisha.rename(columns={'2014-15': 'Odisha'})

punjab = Punjab[['S.No.','Item', '2014-15']]
punjab = punjab.rename(columns={'2014-15': 'Punjab'})

rajasthan = Rajasthan[['S.No.','Item', '2014-15']]
rajasthan = rajasthan.rename(columns={'2014-15': 'Rajasthan'})

sikkim = Sikkim[['S.No.','Item', '2014-15']]
sikkim = sikkim.rename(columns={'2014-15': 'Sikkim'})

tamil_nadu = Tamil_Nadu[['S.No.','Item', '2014-15']]
tamil_nadu = tamil_nadu.rename(columns={'2014-15': 'Tamil_Nadu'})

telangana = Telangana[['S.No.','Item', '2014-15']]
telangana = telangana.rename(columns={'2014-15': 'Telangana'})

tripura = Tripura[['S.No.','Item', '2014-15']]
tripura = tripura.rename(columns={'2014-15': 'Tripura'})

uttar_pradesh = Uttar_Pradesh[['S.No.','Item', '2014-15']]
uttar_pradesh = uttar_pradesh.rename(columns={'2014-15': 'Uttar_Pradesh'})

uttarakhand = Uttarakhand[['S.No.','Item', '2014-15']]
uttarakhand = uttarakhand.rename(columns={'2014-15': 'Uttarakhand'})
In [70]:
dfs = [andhra_pradesh,arunachal_pradesh, assam, bihar, chhattisgarh, goa, gujarat, haryana,himachal_Pradesh,
       jharkhand, karnataka,kerala,madhya_pradesh, maharashtra,meghalaya,mizoram, nagaland,odisha,
       punjab,rajasthan,sikkim,tamil_nadu,telangana,tripura,uttarakhand, uttar_pradesh]


from functools import reduce
df_final = reduce(lambda left,right: pd.merge(left,right,how ='left',on=['S.No.', 'Item']), dfs)
In [71]:
df_final.columns
Out[71]:
Index(['S.No.', 'Item', 'Andhra_Pradesh', 'Arunachal_Pradesh', 'Assam',
       'Bihar', 'Chhattisgarh', 'Goa', 'Gujarat', 'Haryana',
       'Himachal_Pradesh', 'Jharkhand', 'Karnataka', 'Kerala',
       'Madhya_Pradesh', 'Maharashtra', 'Meghalaya', 'Mizoram', 'Nagaland',
       'Odisha', 'Punjab', 'Rajasthan', 'Sikkim', 'Tamil_Nadu', 'Telangana',
       'Tripura', 'Uttarakhand', 'Uttar_Pradesh'],
      dtype='object')
In [72]:
# Renaming some of the state names for merging data at a later stage

df_final = df_final.rename(columns={'Andhra_Pradesh':'Andhra Pradesh', 'Arunachal_Pradesh':'Arunachal Pradesh',
                                   'Himachal_Pradesh':'Himachal Pradesh','Madhya_Pradesh':'Madhya Pradesh',
                                   'Tamil_Nadu':'Tamil Nadu','Uttar_Pradesh':'Uttar Pradesh',
                                   'Chhattisgarh':'Chhatisgarh','Uttarakhand':'Uttrakhand'})
In [73]:
# Final dataframe having the data for all the states for all the sectors and subsectors of the economy

df_final
Out[73]:
S.No. Item Andhra Pradesh Arunachal Pradesh Assam Bihar Chhatisgarh Goa Gujarat Haryana ... Nagaland Odisha Punjab Rajasthan Sikkim Tamil Nadu Telangana Tripura Uttrakhand Uttar Pradesh
0 1 Agriculture, forestry and fishing 686117 3855548 3855548 7951890 3948847 308507 13769969.00 8015238.0 ... 607897 6422978 9285716 15044394 137447 13064238.0 7591501 799825.0 1601423.0 25097754
1 1.1 Crops 415520 2890544 2890544 4688237 2613371 140421 9671086.00 4636731.0 ... 375825 4382636 5690972 7673441 114976 7297820.0 4162493 397591.0 866146.0 16215212
2 1.2 Livestock 38387 173478 173478 2060296 352208 30141 2698910.00 2916173.0 ... 123800 788243 2638842 5356257 17338 4693361.0 2951299 88176.0 391188.0 7096876
3 1.3 Forestry and logging 224017 261987 261987 550132 597785 15744 761616.00 352254.0 ... 99802 791463 848245 1956660 4529 392705.0 210741 145096.0 339293.0 1404936
4 1.4 Fishing and aquaculture 8193 529539 529539 653224 385483 122201 638357.00 110080.0 ... 8470 460636 107657 58036 604 680352.0 266968 168961.0 4796.0 380730
5 2 Mining and quarrying 30842 1471149 1471149 68107 2451970 3622 2117218.00 25186.0 ... 8280 2586328 10354 4069385 1329 265536.0 1541853 142391.0 244549.0 901501
6 Total Primary 716959 5326697 5326697 8019997 6400817 312129 15887187.00 8040424.0 ... 616178 9009306 9296070 19113780 138776 13329774.0 9133354 942216.0 1845972.0 25999255
7 3 Manufacturing 26120 2002936 2002936 2189965 4370593 1177608 24087538.00 7756921.0 ... 18346 5754229 4790341 6552580 550697 18914794.0 6353711 228625.0 5866252.0 12261649
8 4 Electricity, gas, water supply & other utility... 113527 296587 296587 345168 1198438 204110 3409983.00 1101919.0 ... 37944 833067 911611 1122888 212499 1710379.0 716266 77870.0 433880.0 2030625
9 5 Construction 147842 1733568 1733568 3449763 2669855 165819 5526017.00 3702571.0 ... 156072 2402396 2202962 5353326 82058 12216718.0 2854024 177899.0 1342733.0 11256450
10 Total Secondary 287489 4033091 4033091 5984896 8238886 1547536 33023538.00 12561411.0 ... 212361 8989693 7904914 13028794 845253 32841892.0 9924001 484393.0 7642865.0 25548724
11 6 Trade, repair, hotels and restaurants 60421 2987155 2987155 7448373 1535571 380927 10178713.00 4986319.0 ... 140781 3149555 4419919 7297290 70568 12895842.0 6494607 390423.0 1743106.0 9437243
12 6.1 Trade & repair services 56796 2876251 2876251 7081391 1414164 343492 10178713.00 4817784.0 ... 134174 2886789 4201252 6942748 64624 11252588.0 5724128 390423.0 1534073.0 8476139
13 6.2 Hotels & restaurants 3625 110904 110904 366982 121407 37434 NaN 168535.0 ... 6607 262766 218667 354543 5945 1643253.0 770479 NaN 209033.0 961104
14 7 Transport, storage, communication & services r... 35203 1194568 1194568 3147173 871770 189656 4555910.00 2560623.0 ... 77521 2034016 1951809 3814461 47347 7188320.0 3604741 155956.0 1066693.0 7404509
15 7.1 Railways 59 252509 252509 462413 159176 15649 511593.00 423873.0 ... 336 341494 233389 464638 0 468553.0 199686 305.0 21295.0 1618742
16 7.2 Road transport 15467 507668 507668 1572288 386628 46171 NaN 1452364.0 ... 34548 973144 928575 2121206 35283 3660994.0 2055658 NaN NaN 3645747
17 7.3 Water transport 0 4502 4502 2228 0 17820 NaN NaN ... 600 50349 0 0 0 70414.0 0 NaN NaN 681
18 7.4 Air transport 0 26223 26223 13599 9507 46359 NaN NaN ... 4153 15354 4473 13469 0 180836.0 120691 NaN 3889.0 36582
19 7.5 Services incidental to transport 109 35739 35739 166600 5232 19272 NaN 190269.0 ... 0 117469 48124 47609 0 NaN 454909 NaN -76.0 16323
20 7.6 Storage 0 10308 10308 10618 16675 357 57634.00 14459.0 ... 89 22675 76429 16584 0 39834.0 19805 254.0 660.0 171696
21 7.7 Communication & services related to broadcasting 19568 357619 357619 919427 294552 44028 1242520.00 479658.0 ... 37794 513531 660819 1150955 12064 1903283.0 753992 66676.0 733778.0 1914737
22 8 Financial services 25207 543651 543651 1178022 739057 233618 4606644.00 1671486.0 ... 60393 1065147 2057520 1827413 21079 5598498.0 3023729 86094.0 385030.0 3392275
23 9 Real estate, ownership of dwelling & professio... 48418 1412466 1412466 3740641 2462166 407099 5179502.00 6970183.0 ... 159651 2348714 3142786 6451997 75330 16830213.0 9478839 190704.0 831307.0 14548185
24 10 Public administration 243867 1373611 1373611 2078171 867982 346486 2576195.00 1036377.0 ... 295424 1318221 1842730 2460364 119514 3400800.0 1711265 338244.0 579409.0 6152124
25 11 Other services 218728 1795658 1795658 4587589 1112232 180431 3123413.00 2001581.0 ... 259186 2340603 3303041 4164287 149265 7430115.0 4158229 323287.0 982430.0 5034623
26 Total Tertiary 631844 9307109 9307109 22179969 7588778 1738217 30220377.00 19226568.0 ... 992956 12256258 16717805 26015812 483103 53343788.0 28471410 1484709.0 5587975.0 45968959
27 12 TOTAL GSVA at basic prices 1636292 18666897 18666897 36184863 22228481 3597882 79131102.00 39828404.0 ... 1821495 30255256 33918789 58158386 1467133 99515453.0 47528765 2911319.0 15076812.0 97516938
28 13 Taxes on Products 70099 1725309 1725309 3213546 2601791 527279 12353171.04 4985670.0 ... 57674 3151184 3794100 5394503 72200 12507325.0 4425700 149345.0 1434856.0 10107396
29 14 Subsidies on products 30272 582406 582406 2006421 1332092 61854 1981546.00 1067867.0 ... 37745 1209349 911800 2333442 18400 2766405.0 836700 94002.0 313139.0 3287219
30 15 Gross State Domestic Product 1676119 19809800 19809800 37391988 23498180 4063307 89502727.00 43746207.0 ... 1841424 32197092 36801089 61219447 1520933 109256373.0 51117765 2966662.0 16198529.0 104337115
31 16 Population ('00) 14870 326780 326780 1101240 270530 14950 633590.00 266620.0 ... 20550 435220 290673 721610 6330 745760.0 367660 38350.0 105820.0 2109940
32 17 Per Capita GSDP (Rs.) 112718 60621 60621 33954 86860 271793 141263.00 164077.0 ... 89607 73979 126606 84837 240274 146503.0 139035 77358.0 153076.0 49450

33 rows × 28 columns

In [74]:
gdp_per_capita = df_final.iloc[32][2:].sort_values()
gdp_per_capita = gdp_per_capita.to_frame(name = 'GDP per capita')
gdp_per_capita
Out[74]:
GDP per capita
Bihar 33954
Uttar Pradesh 49450
Arunachal Pradesh 60621
Assam 60621
Jharkhand 62091
Madhya Pradesh 62989
Odisha 73979
Meghalaya 76228.0
Tripura 77358.0
Rajasthan 84837
Chhatisgarh 86860
Nagaland 89607
Mizoram 97687
Andhra Pradesh 112718
Punjab 126606
Telangana 139035
Gujarat 141263.0
Karnataka 145141
Tamil Nadu 146503.0
Himachal Pradesh 147330
Maharashtra 152853
Uttrakhand 153076.0
Kerala 154778.0
Haryana 164077.0
Sikkim 240274
Goa 271793
In [75]:
plt.figure(figsize=(12,8), dpi=600)                             

sns.barplot(x = gdp_per_capita['GDP per capita'], y =gdp_per_capita.index, palette='Reds' )
plt.xlabel('GDP per capita', fontsize=12)
plt.ylabel('States', fontsize=12)
plt.title('GDP per capita vs States',fontsize=12)
plt.show()
In [76]:
top_5_gdp_per_capita = gdp_per_capita[-5:]
top_5_gdp_per_capita
Out[76]:
GDP per capita
Uttrakhand 153076.0
Kerala 154778.0
Haryana 164077.0
Sikkim 240274
Goa 271793
In [77]:
bottom_5_gdp_per_capita = gdp_per_capita[:5]
bottom_5_gdp_per_capita
Out[77]:
GDP per capita
Bihar 33954
Uttar Pradesh 49450
Arunachal Pradesh 60621
Assam 60621
Jharkhand 62091
In [78]:
ratio = gdp_per_capita['GDP per capita'].max()/gdp_per_capita['GDP per capita'].min()
print('The Ratio of highest per capita GDP to the lowest per capita GDP is: ',ratio)
The Ratio of highest per capita GDP to the lowest per capita GDP is:  8.004741709371503
In [79]:
# Identifying the Primary, Secondary and the tertiary sectors and concating these to form a dataframe

primary = df_final[df_final['Item']=='Primary']
secondary = df_final[df_final['Item']=='Secondary']
tertiary = df_final[df_final['Item']=='Tertiary']
gdp = df_final[df_final['Item']=='Gross State Domestic Product']

pst = pd.concat([primary, secondary,tertiary,gdp], axis = 0).reset_index()
pst =  pst.drop(['index','S.No.'], axis = 1).set_index('Item')
In [80]:
pst
Out[80]:
Andhra Pradesh Arunachal Pradesh Assam Bihar Chhatisgarh Goa Gujarat Haryana Himachal Pradesh Jharkhand ... Nagaland Odisha Punjab Rajasthan Sikkim Tamil Nadu Telangana Tripura Uttrakhand Uttar Pradesh
Item
Primary 716959 5326697 5326697 8019997 6400817 312129 15887187.0 8040424.0 1548366 5248354 ... 616178 9009306 9296070 19113780 138776 13329774.0 9133354 942216.0 1845972.0 25999255
Secondary 287489 4033091 4033091 5984896 8238886 1547536 33023538.0 12561411.0 4119162 6241471 ... 212361 8989693 7904914 13028794 845253 32841892.0 9924001 484393.0 7642865.0 25548724
Tertiary 631844 9307109 9307109 22179969 7588778 1738217 30220377.0 19226568.0 4133326 8133341 ... 992956 12256258 16717805 26015812 483103 53343788.0 28471410 1484709.0 5587975.0 45968959
Gross State Domestic Product 1676119 19809800 19809800 37391988 23498180 4063307 89502727.0 43746207.0 10436879 21710718 ... 1841424 32197092 36801089 61219447 1520933 109256373.0 51117765 2966662.0 16198529.0 104337115

4 rows × 26 columns

In [81]:
# calculating the percentage contribution of each sector to the Gross State Domestic Product for each state

pst.loc['primary_percentage'] = pst.loc['Primary'] / pst.loc['Gross State Domestic Product'] * 100
pst.loc['secondary_percentage'] = pst.loc['Secondary'] / pst.loc['Gross State Domestic Product'] * 100
pst.loc['tertiary_percentage'] = pst.loc['Tertiary'] / pst.loc['Gross State Domestic Product'] * 100
In [82]:
pst
Out[82]:
Andhra Pradesh Arunachal Pradesh Assam Bihar Chhatisgarh Goa Gujarat Haryana Himachal Pradesh Jharkhand ... Nagaland Odisha Punjab Rajasthan Sikkim Tamil Nadu Telangana Tripura Uttrakhand Uttar Pradesh
Item
Primary 7.169590e+05 5.326697e+06 5.326697e+06 8.019997e+06 6.400817e+06 3.121290e+05 1.588719e+07 8.040424e+06 1.548366e+06 5.248354e+06 ... 6.161780e+05 9.009306e+06 9.296070e+06 1.911378e+07 1.387760e+05 1.332977e+07 9.133354e+06 9.422160e+05 1.845972e+06 2.599926e+07
Secondary 2.874890e+05 4.033091e+06 4.033091e+06 5.984896e+06 8.238886e+06 1.547536e+06 3.302354e+07 1.256141e+07 4.119162e+06 6.241471e+06 ... 2.123610e+05 8.989693e+06 7.904914e+06 1.302879e+07 8.452530e+05 3.284189e+07 9.924001e+06 4.843930e+05 7.642865e+06 2.554872e+07
Tertiary 6.318440e+05 9.307109e+06 9.307109e+06 2.217997e+07 7.588778e+06 1.738217e+06 3.022038e+07 1.922657e+07 4.133326e+06 8.133341e+06 ... 9.929560e+05 1.225626e+07 1.671780e+07 2.601581e+07 4.831030e+05 5.334379e+07 2.847141e+07 1.484709e+06 5.587975e+06 4.596896e+07
Gross State Domestic Product 1.676119e+06 1.980980e+07 1.980980e+07 3.739199e+07 2.349818e+07 4.063307e+06 8.950273e+07 4.374621e+07 1.043688e+07 2.171072e+07 ... 1.841424e+06 3.219709e+07 3.680109e+07 6.121945e+07 1.520933e+06 1.092564e+08 5.111776e+07 2.966662e+06 1.619853e+07 1.043371e+08
primary_percentage 4.277495e+01 2.688920e+01 2.688920e+01 2.144844e+01 2.723963e+01 7.681649e+00 1.775051e+01 1.837971e+01 1.483553e+01 2.417402e+01 ... 3.346204e+01 2.798174e+01 2.526031e+01 3.122175e+01 9.124399e+00 1.220045e+01 1.786728e+01 3.176014e+01 1.139592e+01 2.491851e+01
secondary_percentage 1.715206e+01 2.035907e+01 2.035907e+01 1.600582e+01 3.506180e+01 3.808563e+01 3.689668e+01 2.871429e+01 3.946737e+01 2.874834e+01 ... 1.153243e+01 2.792082e+01 2.148011e+01 2.128212e+01 5.557464e+01 3.005947e+01 1.941400e+01 1.632788e+01 4.718246e+01 2.448671e+01
tertiary_percentage 3.769685e+01 4.698235e+01 4.698235e+01 5.931744e+01 3.229517e+01 4.277838e+01 3.376476e+01 4.395025e+01 3.960308e+01 3.746233e+01 ... 5.392327e+01 3.806635e+01 4.542747e+01 4.249599e+01 3.176360e+01 4.882442e+01 5.569768e+01 5.004645e+01 3.449681e+01 4.405811e+01

7 rows × 26 columns

In [83]:
# Transposing the dataframe for better readability

pst = pst.T
pst = pst.sort_values('Gross State Domestic Product')
pst
Out[83]:
Item Primary Secondary Tertiary Gross State Domestic Product primary_percentage secondary_percentage tertiary_percentage
Mizoram 225598.0 270072.0 637619.0 1155933.0 19.516529 23.363984 55.160550
Sikkim 138776.0 845253.0 483103.0 1520933.0 9.124399 55.574637 31.763595
Andhra Pradesh 716959.0 287489.0 631844.0 1676119.0 42.774946 17.152064 37.696846
Nagaland 616178.0 212361.0 992956.0 1841424.0 33.462038 11.532434 53.923268
Meghalaya 451050.0 637942.0 1200655.0 2440807.0 18.479544 26.136520 49.190903
Tripura 942216.0 484393.0 1484709.0 2966662.0 31.760140 16.327880 50.046450
Goa 312129.0 1547536.0 1738217.0 4063307.0 7.681649 38.085628 42.778382
Himachal Pradesh 1548366.0 4119162.0 4133326.0 10436879.0 14.835527 39.467373 39.603084
Uttrakhand 1845972.0 7642865.0 5587975.0 16198529.0 11.395924 47.182463 34.496805
Assam 5326697.0 4033091.0 9307109.0 19809800.0 26.889201 20.359070 46.982347
Arunachal Pradesh 5326697.0 4033091.0 9307109.0 19809800.0 26.889201 20.359070 46.982347
Jharkhand 5248354.0 6241471.0 8133341.0 21710718.0 24.174023 28.748340 37.462331
Chhatisgarh 6400817.0 8238886.0 7588778.0 23498180.0 27.239629 35.061805 32.295173
Odisha 9009306.0 8989693.0 12256258.0 32197092.0 27.981738 27.920823 38.066351
Punjab 9296070.0 7904914.0 16717805.0 36801089.0 25.260312 21.480109 45.427474
Bihar 8019997.0 5984896.0 22179969.0 37391988.0 21.448437 16.005825 59.317437
Haryana 8040424.0 12561411.0 19226568.0 43746207.0 18.379705 28.714286 43.950252
Madhya Pradesh 17854020.0 10044889.0 18117360.0 48198169.0 37.042942 20.840810 37.589312
Telangana 9133354.0 9924001.0 28471410.0 51117765.0 17.867280 19.413996 55.697682
Kerala 6489442.0 12070040.0 29673778.0 52600230.0 12.337288 22.946744 56.413780
Rajasthan 19113780.0 13028794.0 26015812.0 61219447.0 31.221746 21.282116 42.495993
Gujarat 15887187.0 33023538.0 30220377.0 89502727.0 17.750506 36.896684 33.764756
Karnataka 12066304.0 20484404.0 50490630.0 92178806.0 13.090107 22.222466 54.774663
Uttar Pradesh 25999255.0 25548724.0 45968959.0 104337115.0 24.918511 24.486707 44.058108
Tamil Nadu 13329774.0 32841892.0 53343788.0 109256373.0 12.200454 30.059475 48.824418
Maharashtra 21758383.0 47445207.0 88631076.0 179212165.0 12.141131 26.474323 49.455948
In [84]:
plt.figure(figsize=(12,10), dpi =600)

bars1 = pst['primary_percentage']
bars2 = pst['secondary_percentage']
bars3 = pst['tertiary_percentage']
 
legends = ['Primary %', 'Secondary %', 'Tertiary %']

bars = np.add(bars1, bars2).tolist()
 
r = np.arange(0,len(pst.index))
 
names = pst.index
barWidth = 1
 
# Create red bars
plt.bar(r, bars1, color='red', edgecolor='white')
# Create green bars (middle), on top of the firs ones
plt.bar(r, bars2, bottom=bars1, color='green', edgecolor='white')
# Create blue bars (top)
plt.bar(r, bars3, bottom=bars, color='blue', edgecolor='white')
 
plt.xticks(r, names,rotation=90)
plt.xlabel('States',fontsize=12)
plt.ylabel('Percentage contribution to GDP',fontsize=12)
plt.title('Percentage contribution of the Primary, Secondary and Tertiary sectors as a percentage of the total GDP for all the states')

plt.legend(legends)

plt.tight_layout()
In [85]:
gdp_per_capita
Out[85]:
GDP per capita
Bihar 33954
Uttar Pradesh 49450
Arunachal Pradesh 60621
Assam 60621
Jharkhand 62091
Madhya Pradesh 62989
Odisha 73979
Meghalaya 76228.0
Tripura 77358.0
Rajasthan 84837
Chhatisgarh 86860
Nagaland 89607
Mizoram 97687
Andhra Pradesh 112718
Punjab 126606
Telangana 139035
Gujarat 141263.0
Karnataka 145141
Tamil Nadu 146503.0
Himachal Pradesh 147330
Maharashtra 152853
Uttrakhand 153076.0
Kerala 154778.0
Haryana 164077.0
Sikkim 240274
Goa 271793
In [86]:
# States between the 85th and 100th percentile

C1 = gdp_per_capita[gdp_per_capita['GDP per capita'] > gdp_per_capita['GDP per capita'].quantile(0.85)]
C1
Out[86]:
GDP per capita
Kerala 154778.0
Haryana 164077.0
Sikkim 240274
Goa 271793
In [87]:
# States between the 50th and 85th percentile

C2 = gdp_per_capita[(gdp_per_capita['GDP per capita'] > gdp_per_capita['GDP per capita'].quantile(0.50)) & (gdp_per_capita['GDP per capita'] < gdp_per_capita['GDP per capita'].quantile(0.85))]
C2
Out[87]:
GDP per capita
Andhra Pradesh 112718
Punjab 126606
Telangana 139035
Gujarat 141263.0
Karnataka 145141
Tamil Nadu 146503.0
Himachal Pradesh 147330
Maharashtra 152853
Uttrakhand 153076.0
In [88]:
# States between the 20th and 50th percentile

C3 = gdp_per_capita[(gdp_per_capita['GDP per capita'] > gdp_per_capita['GDP per capita'].quantile(0.20)) & (gdp_per_capita['GDP per capita'] <= gdp_per_capita['GDP per capita'].quantile(0.50))]
C3
Out[88]:
GDP per capita
Odisha 73979
Meghalaya 76228.0
Tripura 77358.0
Rajasthan 84837
Chhatisgarh 86860
Nagaland 89607
Mizoram 97687
In [89]:
# States below the 20th percentile

C4 = gdp_per_capita[gdp_per_capita['GDP per capita'] < gdp_per_capita['GDP per capita'].quantile(0.20)]
C4
Out[89]:
GDP per capita
Bihar 33954
Uttar Pradesh 49450
Arunachal Pradesh 60621
Assam 60621
Jharkhand 62091
In [90]:
C1_df = df_final[['S.No.','Item']+list(states for states in C1.index)]
C2_df = df_final[['S.No.','Item']+list(states for states in C2.index)]
C3_df = df_final[['S.No.','Item']+list(states for states in C3.index)]
C4_df = df_final[['S.No.','Item']+list(states for states in C4.index)]
In [91]:
C1_df = C1_df.iloc[[0,5,7,8,9,11,14,22,23,24,25,30,32]]
C2_df = C2_df.iloc[[0,5,7,8,9,11,14,22,23,24,25,30,32]]
C3_df = C3_df.iloc[[0,5,7,8,9,11,14,22,23,24,25,30,32]]
C4_df = C4_df.iloc[[0,5,7,8,9,11,14,22,23,24,25,30,32]]
In [92]:
C1_df
Out[92]:
S.No. Item Kerala Haryana Sikkim Goa
0 1 Agriculture, forestry and fishing 5930617.0 8015238.0 137447 308507
5 2 Mining and quarrying 558824.0 25186.0 1329 3622
7 3 Manufacturing 4273567.0 7756921.0 550697 1177608
8 4 Electricity, gas, water supply & other utility... 482470.0 1101919.0 212499 204110
9 5 Construction 7314003.0 3702571.0 82058 165819
11 6 Trade, repair, hotels and restaurants 8557345.0 4986319.0 70568 380927
14 7 Transport, storage, communication & services r... 4020934.0 2560623.0 47347 189656
22 8 Financial services 2010306.0 1671486.0 21079 233618
23 9 Real estate, ownership of dwelling & professio... 7287633.0 6970183.0 75330 407099
24 10 Public administration 2068915.0 1036377.0 119514 346486
25 11 Other services 5728645.0 2001581.0 149265 180431
30 15 Gross State Domestic Product 52600230.0 43746207.0 1520933 4063307
32 17 Per Capita GSDP (Rs.) 154778.0 164077.0 240274 271793
In [93]:
# Creating the column for Total values for all sub-sectors for all the states and the column for the percentage contribution
# to the total GSDP by each of the sub-sectors for all the states

C1_df['Total for all states'] = C1_df['Kerala']+C1_df['Haryana']+C1_df['Sikkim']+C1_df['Goa']
C1_df['Percentage of Total GDP'] = C1_df['Total for all states']/C1_df['Total for all states'][11] * 100
C1_df
Out[93]:
S.No. Item Kerala Haryana Sikkim Goa Total for all states Percentage of Total GDP
0 1 Agriculture, forestry and fishing 5930617.0 8015238.0 137447 308507 14391809.0 102.834194
5 2 Mining and quarrying 558824.0 25186.0 1329 3622 588961.0 4.208319
7 3 Manufacturing 4273567.0 7756921.0 550697 1177608 13758793.0 98.311087
8 4 Electricity, gas, water supply & other utility... 482470.0 1101919.0 212499 204110 2000998.0 14.297787
9 5 Construction 7314003.0 3702571.0 82058 165819 11264451.0 80.488196
11 6 Trade, repair, hotels and restaurants 8557345.0 4986319.0 70568 380927 13995159.0 100.000000
14 7 Transport, storage, communication & services r... 4020934.0 2560623.0 47347 189656 6818560.0 48.720847
22 8 Financial services 2010306.0 1671486.0 21079 233618 3936489.0 28.127505
23 9 Real estate, ownership of dwelling & professio... 7287633.0 6970183.0 75330 407099 14740245.0 105.323884
24 10 Public administration 2068915.0 1036377.0 119514 346486 3571292.0 25.518052
25 11 Other services 5728645.0 2001581.0 149265 180431 8059922.0 57.590785
30 15 Gross State Domestic Product 52600230.0 43746207.0 1520933 4063307 101930677.0 728.328110
32 17 Per Capita GSDP (Rs.) 154778.0 164077.0 240274 271793 830922.0 5.937210
In [94]:
# Identifying the major sub-sectors contributing more to the GSDP  by finding the cumulative sum

C1_contributor = C1_df[['Item','Percentage of Total GDP']][:-2].sort_values(by='Percentage of Total GDP', ascending=False)
C1_contributor.reset_index(drop=True, inplace=True)
C1_contributor['Cumulative sum'] = C1_contributor['Percentage of Total GDP'].cumsum()
C1_contributor
Out[94]:
Item Percentage of Total GDP Cumulative sum
0 Real estate, ownership of dwelling & professio... 105.323884 105.323884
1 Agriculture, forestry and fishing 102.834194 208.158078
2 Trade, repair, hotels and restaurants 100.000000 308.158078
3 Manufacturing 98.311087 406.469166
4 Construction 80.488196 486.957361
5 Other services 57.590785 544.548147
6 Transport, storage, communication & services r... 48.720847 593.268994
7 Financial services 28.127505 621.396499
8 Public administration 25.518052 646.914551
9 Electricity, gas, water supply & other utility... 14.297787 661.212338
10 Mining and quarrying 4.208319 665.420657
In [95]:
plt.figure(figsize=(6,4), dpi=600)
sns.barplot(y=C1_contributor['Item'], x = C1_contributor['Percentage of Total GDP'], palette='inferno')
plt.xlabel("Percentage of Total GSDP for C1 States")
plt.ylabel('Sub-sectors')
plt.title('Percentage of Total GSDP for C1 States vs Sub-sectors')
plt.savefig("Percentage of Total GSDP for C1 States vs Sub-sectors.png", bbox_inches='tight', dpi=600)

plt.show()
In [96]:
C2_df['Total for all states']=list(C2_df[list(states for states in C2_df.columns)[2:]].sum(axis=1))
C2_df['Percentage of Total GDP'] = C2_df['Total for all states']/C2_df['Total for all states'][11] * 100
C2_contributor = C2_df[['Item','Percentage of Total GDP']][:-2].sort_values(by='Percentage of Total GDP', ascending=False)
C2_contributor.reset_index(drop=True, inplace=True)
C2_contributor['Cumulative sum'] = C2_contributor['Percentage of Total GDP'].cumsum()
C2_contributor
Out[96]:
Item Percentage of Total GDP Cumulative sum
0 Manufacturing 178.312474 178.312474
1 Real estate, ownership of dwelling & professio... 150.429716 328.742190
2 Agriculture, forestry and fishing 122.812573 451.554763
3 Trade, repair, hotels and restaurants 100.000000 551.554763
4 Construction 66.385234 617.939997
5 Financial services 59.270602 677.210599
6 Other services 59.200409 736.411008
7 Transport, storage, communication & services r... 55.111749 791.522757
8 Public administration 30.320314 821.843071
9 Electricity, gas, water supply & other utility... 22.572885 844.415956
10 Mining and quarrying 16.939155 861.355111
In [97]:
plt.figure(figsize=(6,4), dpi=600)
sns.barplot(y=C2_contributor['Item'], x = C2_contributor['Percentage of Total GDP'],palette='hot')
plt.xlabel("Percentage of Total GSDP for C2 States")
plt.ylabel('Sub-sectors')
plt.title('Percentage of Total GSDP for C2 States vs Sub-sectors')
plt.show()
In [98]:
C3_df['Total for all states']=list(C3_df[list(states for states in C3_df.columns)[2:]].sum(axis=1))
C3_df['Percentage of Total GDP'] = C3_df['Total for all states']/C3_df['Total for all states'][11] * 100
C3_contributor = C3_df[['Item','Percentage of Total GDP']][:-2].sort_values(by='Percentage of Total GDP', ascending=False)
C3_contributor.reset_index(drop=True, inplace=True)
C3_contributor['Cumulative sum'] = C3_contributor['Percentage of Total GDP'].cumsum()
C3_contributor
Out[98]:
Item Percentage of Total GDP Cumulative sum
0 Agriculture, forestry and fishing 210.633751 210.633751
1 Manufacturing 133.462853 344.096604
2 Trade, repair, hotels and restaurants 100.000000 444.096604
3 Real estate, ownership of dwelling & professio... 90.829939 534.926543
4 Construction 84.868654 619.795197
5 Mining and quarrying 71.868555 691.663752
6 Other services 66.093515 757.757267
7 Transport, storage, communication & services r... 54.984246 812.741512
8 Public administration 44.007962 856.749475
9 Financial services 29.706348 886.455823
10 Electricity, gas, water supply & other utility... 26.537574 912.993397
In [99]:
plt.figure(figsize=(6,4), dpi=600)
sns.barplot(y=C3_contributor['Item'], x = C3_contributor['Percentage of Total GDP'], palette='autumn')
plt.xlabel("Percentage of Total GSDP for C3 States")
plt.ylabel('Sub-sectors')
plt.title('Percentage of Total GSDP for C3 States vs Sub-sectors')

plt.show()
In [100]:
C4_df['Total for all states']=list(C4_df[list(states for states in C4_df.columns)[2:]].sum(axis=1))
C4_df['Percentage of Total GDP'] = C4_df['Total for all states']/C4_df['Total for all states'][11] * 100
C4_contributor = C4_df[['Item','Percentage of Total GDP']][:-2].sort_values(by='Percentage of Total GDP', ascending=False)
C4_contributor.reset_index(drop=True, inplace=True)
C4_contributor['Cumulative sum'] = C4_contributor['Percentage of Total GDP'].cumsum()
C4_contributor
Out[100]:
Item Percentage of Total GDP Cumulative sum
0 Agriculture, forestry and fishing 176.939764 176.939764
1 Trade, repair, hotels and restaurants 100.000000 276.939764
2 Real estate, ownership of dwelling & professio... 91.627222 368.566986
3 Manufacturing 90.826828 459.393814
4 Construction 80.330587 539.724401
5 Other services 58.704964 598.429365
6 Transport, storage, communication & services r... 57.988309 656.417674
7 Public administration 48.552186 704.969860
8 Financial services 24.984784 729.954644
9 Mining and quarrying 23.939185 753.893829
10 Electricity, gas, water supply & other utility... 13.304970 767.198799
In [101]:
plt.figure(figsize=(6,4), dpi=600)
sns.barplot(y=C4_contributor['Item'], x = C4_contributor['Percentage of Total GDP'], palette='spring')
plt.xlabel("Percentage of Total GSDP for C4 States")
plt.ylabel('Sub-sectors')
plt.title('Percentage of Total GSDP for C4 States vs Sub-sectors')

plt.show()
In [102]:
# Reading the data and selecting the data for the year 2014-14 and the education level for Primary, Upper Primary and Secondary

data2 = pd.read_csv("D:/Application of software/rs_session243_au570_1.1.csv")
data2 = data2[['Level of Education - State','Primary - 2014-2015.1','Upper Primary - 2014-2015','Secondary - 2014-2015']]
data2
Out[102]:
Level of Education - State Primary - 2014-2015.1 Upper Primary - 2014-2015 Secondary - 2014-2015
0 A & N Islands 0.51 1.69 9.870
1 Andhra Pradesh 6.72 5.20 15.710
2 Arunachal Pradesh 10.82 6.71 17.110
3 Assam 15.36 10.51 27.060
4 Bihar NaN 4.08 25.900
5 Chandigarh NaN 0.44 NaN
6 Chhatisgarh 2.91 5.85 21.260
7 Dadra & Nagar Haveli 1.47 4.02 16.770
8 Daman & Diu 1.11 3.11 32.270
9 Delhi NaN 0.76 11.810
10 Goa 0.73 0.07 11.150
11 Gujarat 0.89 6.41 25.040
12 Haryana 5.61 5.81 15.890
13 Himachal Pradesh 0.64 0.87 6.070
14 Jammu and Kashmir 6.79 5.44 17.280
15 Jharkhand 5.48 8.99 24.000
16 Karnataka 2.02 3.85 26.180
17 Kerala NaN NaN 12.320
18 Lakshadweep NaN 2.78 6.763
19 Madhya Pradesh 6.59 9.20 24.770
20 Maharashtra 1.26 1.79 12.870
21 Manipur 9.66 4.20 14.380
22 Meghalaya 9.46 6.52 20.520
23 Mizoram 10.10 4.78 21.880
24 Nagaland 5.61 7.92 18.230
25 Odisha 2.86 3.81 29.560
26 Puducherry 0.37 0.56 12.190
27 Punjab 3.05 3.22 8.860
28 Rajasthan 5.02 3.07 13.480
29 Sikkim 2.27 1.57 15.890
30 Tamil Nadu NaN NaN 8.100
31 Telangana 2.08 2.30 15.530
32 Tripura 1.28 1.99 28.420
33 Uttar Pradesh 8.58 2.70 10.220
34 Uttrakhand 4.04 1.19 10.400
35 West Bengal 1.47 4.30 17.800
36 All India 4.13 4.03 17.060
In [103]:
# Dropping rows of data which we don not need like Union Territories and for which we don't have GDP per-capita available like West Bengal

data2 =  data2.drop([0,5,7,8,9,14,18,26,35,36])
data2 = data2.reset_index(drop = True)
data2=data2.rename(columns={'Level of Education - State': 'State'})
In [104]:
# Necessary transformation like resetting the index and renaming the column name for merging with another dataframe

states_gdp_per_capita = gdp_per_capita.reset_index()
states_gdp_per_capita=states_gdp_per_capita.rename(columns={'index':'State'})
In [105]:
data2_final = pd.merge(data2,states_gdp_per_capita,how='left',on=['State'])
In [106]:
data2_final = data2_final.rename(columns={'State':'Level of education - State'})
In [107]:
# Final dataframe having the education level dropout rates for all the states and the GDP per capita

data2_final
Out[107]:
Level of education - State Primary - 2014-2015.1 Upper Primary - 2014-2015 Secondary - 2014-2015 GDP per capita
0 Andhra Pradesh 6.72 5.20 15.71 112718
1 Arunachal Pradesh 10.82 6.71 17.11 60621
2 Assam 15.36 10.51 27.06 60621
3 Bihar NaN 4.08 25.90 33954
4 Chhatisgarh 2.91 5.85 21.26 86860
5 Goa 0.73 0.07 11.15 271793
6 Gujarat 0.89 6.41 25.04 141263.0
7 Haryana 5.61 5.81 15.89 164077.0
8 Himachal Pradesh 0.64 0.87 6.07 147330
9 Jharkhand 5.48 8.99 24.00 62091
10 Karnataka 2.02 3.85 26.18 145141
11 Kerala NaN NaN 12.32 154778.0
12 Madhya Pradesh 6.59 9.20 24.77 62989
13 Maharashtra 1.26 1.79 12.87 152853
14 Manipur 9.66 4.20 14.38 NaN
15 Meghalaya 9.46 6.52 20.52 76228.0
16 Mizoram 10.10 4.78 21.88 97687
17 Nagaland 5.61 7.92 18.23 89607
18 Odisha 2.86 3.81 29.56 73979
19 Punjab 3.05 3.22 8.86 126606
20 Rajasthan 5.02 3.07 13.48 84837
21 Sikkim 2.27 1.57 15.89 240274
22 Tamil Nadu NaN NaN 8.10 146503.0
23 Telangana 2.08 2.30 15.53 139035
24 Tripura 1.28 1.99 28.42 77358.0
25 Uttar Pradesh 8.58 2.70 10.22 49450
26 Uttrakhand 4.04 1.19 10.40 153076.0
In [108]:
data2_final.describe()
Out[108]:
Primary - 2014-2015.1 Upper Primary - 2014-2015 Secondary - 2014-2015
count 24.000000 25.000000 27.000000
mean 5.126667 4.504400 17.807407
std 3.890927 2.781644 6.845367
min 0.640000 0.070000 6.070000
25% 2.065000 2.300000 12.595000
50% 4.530000 4.080000 15.890000
75% 7.185000 6.410000 24.385000
max 15.360000 10.510000 29.560000
In [109]:
import seaborn as sns
import matplotlib.pyplot as plt
In [112]:
# Primary - 2014-2015.1

plt.figure(figsize=(8,6), dpi= 600)

sns.scatterplot(y=data2_final['GDP per capita'],x=data2_final['Primary - 2014-2015.1'])
plt.xlabel('Primary Drop out rate')
plt.ylabel('Per capita GDP')
plt.title('Per capita GDP vs Primary Drop out rate')
plt.show()
In [113]:
# Upper Primary - 2014-2015

plt.figure(figsize=(8,6), dpi= 600)

sns.scatterplot(y=data2_final['GDP per capita'],x=data2_final['Upper Primary - 2014-2015'])
plt.xlabel('Upper Primary Drop out rate')
plt.ylabel('Per capita GDP')
plt.title('Per capita GDP vs Upper Primary Drop out rate')
plt.show()
In [114]:
# Secondary - 2014-2015

plt.figure(figsize=(8,6), dpi= 100)

sns.scatterplot(y=data2_final['GDP per capita'],x=data2_final['Secondary - 2014-2015'])
plt.xlabel('Secondary Drop out rate')
plt.ylabel('Per capita GDP')
plt.title('Per capita GDP vs Secondary Drop out rate')
plt.show()
In [ ]: